# Copyright (c) 2024, NVIDIA CORPORATION.

from collections.abc import Mapping

from typing_extensions import Self

from pylibcudf.expressions import Expression
from pylibcudf.io.types import (
    CompressionType,
    DictionaryPolicy,
    PartitionInfo,
    SinkInfo,
    SourceInfo,
    StatisticsFreq,
    TableInputMetadata,
    TableWithMetadata,
)
from pylibcudf.table import Table

class ParquetReaderOptions:
    def __init__(self): ...
    def set_row_groups(self, row_groups: list[list[int]]): ...
    def set_num_rows(self, nrows: int): ...
    def set_skip_rows(self, skip_rows: int): ...
    def set_columns(self, col_names: list[str]): ...
    def set_filter(self, filter: Expression): ...
    @staticmethod
    def builder(source: SourceInfo) -> ParquetReaderOptionsBuilder: ...

class ParquetReaderOptionsBuilder:
    def __init__(self): ...
    def convert_strings_to_categories(self, val: bool) -> Self: ...
    def use_pandas_metadata(self, val: bool) -> Self: ...
    def allow_mismatched_pq_schemas(self, val: bool) -> Self: ...
    def use_arrow_schema(self, val: bool) -> Self: ...
    def build(self) -> ParquetReaderOptions: ...

class ChunkedParquetReader:
    def __init__(
        self,
        source_info: SourceInfo,
        columns: list[str] | None = None,
        row_groups: list[list[int]] | None = None,
        use_pandas_metadata: bool = True,
        convert_strings_to_categories: bool = False,
        skip_rows: int = 0,
        nrows: int = 0,
        chunk_read_limit: int = 0,
        pass_read_limit: int = 1024000000,
        allow_mismatched_pq_schemas: bool = False,
    ) -> None: ...
    def has_next(self) -> bool: ...
    def read_chunk(self) -> TableWithMetadata: ...

def read_parquet(
    source_info: SourceInfo,
    columns: list[str] | None = None,
    row_groups: list[list[int]] | None = None,
    filters: Expression | None = None,
    convert_strings_to_categories: bool = False,
    use_pandas_metadata: bool = True,
    skip_rows: int = 0,
    nrows: int = -1,
    allow_mismatched_pq_schemas: bool = False,
    # disabled see comment in parquet.pyx for more
    # reader_column_schema: ReaderColumnSchema = *,
    # timestamp_type: DataType = *
) -> TableWithMetadata: ...

class ParquetWriterOptions:
    def __init__(self): ...
    @staticmethod
    def builder(
        sink: SinkInfo, table: Table
    ) -> ParquetWriterOptionsBuilder: ...
    def set_partitions(self, partitions: list[PartitionInfo]) -> None: ...
    def set_column_chunks_file_paths(self, file_paths: list[str]) -> None: ...
    def set_row_group_size_bytes(self, size_bytes: int) -> None: ...
    def set_row_group_size_rows(self, size_rows: int) -> None: ...
    def set_max_page_size_bytes(self, size_bytes: int) -> None: ...
    def set_max_page_size_rows(self, size_rows: int) -> None: ...
    def set_max_dictionary_size(self, size_bytes: int) -> None: ...

class ParquetWriterOptionsBuilder:
    def __init__(self): ...
    def metadata(self, metadata: TableInputMetadata) -> Self: ...
    def key_value_metadata(
        self, metadata: list[Mapping[str, str]]
    ) -> Self: ...
    def compression(self, compression: CompressionType) -> Self: ...
    def stats_level(self, sf: StatisticsFreq) -> Self: ...
    def int96_timestamps(self, enabled: bool) -> Self: ...
    def write_v2_headers(self, enabled: bool) -> Self: ...
    def dictionary_policy(self, val: DictionaryPolicy) -> Self: ...
    def utc_timestamps(self, enabled: bool) -> Self: ...
    def write_arrow_schema(self, enabled: bool) -> Self: ...
    def build(self) -> ParquetWriterOptions: ...

def write_parquet(options: ParquetWriterOptions) -> memoryview: ...

class ParquetChunkedWriter:
    def __init__(self): ...
    def close(self, metadata_file_path: list) -> memoryview: ...
    def write(self, table: Table) -> None: ...
    @staticmethod
    def from_options(options: ChunkedParquetWriterOptions) -> Self: ...

class ChunkedParquetWriterOptions:
    def __init__(self): ...
    def set_dictionary_policy(self, policy: DictionaryPolicy) -> None: ...
    @staticmethod
    def builder(sink: SinkInfo) -> ChunkedParquetWriterOptionsBuilder: ...

class ChunkedParquetWriterOptionsBuilder:
    def __init__(self): ...
    def metadata(self, metadata: TableInputMetadata) -> Self: ...
    def key_value_metadata(
        self, metadata: list[Mapping[str, str]]
    ) -> Self: ...
    def compression(self, compression: CompressionType) -> Self: ...
    def stats_level(self, sf: StatisticsFreq) -> Self: ...
    def row_group_size_bytes(self, val: int) -> Self: ...
    def row_group_size_rows(self, val: int) -> Self: ...
    def max_page_size_bytes(self, val: int) -> Self: ...
    def max_page_size_rows(self, val: int) -> Self: ...
    def max_dictionary_size(self, val: int) -> Self: ...
    def write_arrow_schema(self, enabled: bool) -> Self: ...
    def build(self) -> ChunkedParquetWriterOptions: ...

def merge_row_group_metadata(metdata_list: list) -> memoryview: ...
